# ----------------------------------------------------------------------
# ' this file process diagnosis data, and produce intermediate files
# ' pain diagnosis for each year quarter : also add pain cluster/subgroup information 
# ' by BK (bl11@indiana.edu)
# ----------------------------------------------------------------------

load_library = c('bit64','data.table','fst','future.apply','stringr','logger')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))
options(scipen=999)

bucket = file.path('/N','project','iuni_doctorshopping')

# read arguments
args=commandArgs(TRUE)
infile = args[[1]]
outfile = args[[2]]

# read diagnosis file
ddata = read_fst(infile, columns=c('PATID','PAT_PLANID',"FST_DT",
	"CLMID","DIAG","ICD_FLAG","LOC_CD","POA"), as.data.table=TRUE)

#format(object.size(ddata),'Gb')

# read pain diagnosis code
logger::log_info('now reading pain_dx files')
pain_dx = fread(file.path(bucket,'additional_data','pain_code','pain_dx_list.csv'))

# remove decimal points for mapping
pain_dx[, DX_CODE := gsub('\\.','',DX_CODE)]

# select only relevant columns
pain_dx = unique(pain_dx[, c('DX_CODE','DX_CLUSTER','DX_SUB_CATEGORY')])

# join by dropping unmerged diagnosis
ddata = merge(x=ddata, y=pain_dx, by.x='DIAG',by.y='DX_CODE')
del_var = c('ICD_FLAG','DIAG')
ddata[, (del_var) := NULL]

fwrite(ddata, outfile)

